package com.momo.demo11_utils;

import java.io.File;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;

import net.paoding.analysis.analyzer.PaodingAnalyzer;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.Version;

import com.momo.javabean.Article;
import com.momo.utils.Configuration;

/**
 * Lucene初级小例子，简单测试。
 * @author LiangJian 2011-6-17 11:56:14
 */
public class TestIndex {

	/** 创建Lucene索引 */
	public void createIndex(String indexPath, List<Article> articleList) throws Exception {
		// 记录开始时间
		long startTime = new Date().getTime();
		/** 建立索引，使用庖丁中文分词器PaodingAnalyzer。 */
		IndexWriter indexWriter = new IndexWriter(FSDirectory.open(new File(indexPath)), new PaodingAnalyzer(), true, MaxFieldLength.LIMITED);
		/**
		 * 说明： Field.TermVector.NO:不保存term vectors Field.TermVector.YES:保存term
		 * vectors Field.TermVector.WITH_POSITIONS:保存term
		 * vectors.(保存值和token位置信息) Field.TermVector.WITH_OFFSETS:保存term
		 * vectors.(保存值和Token的offset)
		 * Field.TermVector.WITH_POSITIONS_OFFSETS:保存term
		 * vectors.(保存值和token位置信息和Token的offset)
		 */
		for (Article article : articleList) {
			Document doc = new Document();
			Field idField = new Field("id", NumericUtils.longToPrefixCoded(article.getId()), Store.YES, Index.NOT_ANALYZED);
			Field titleField = new Field("title", article.getTitle(), Store.YES, Index.ANALYZED);
			Field contentField = new Field("content", article.getContent(), Store.YES, Index.ANALYZED);
			doc.add(idField);
			doc.add(titleField);
			doc.add(contentField);
			indexWriter.addDocument(doc);
		}
		// optimize()方法是对索引进行优化，进行了索引优化后，索引才算是真正的生效。
		indexWriter.optimize();
		indexWriter.close();
		// 测试一下索引的时间
		long endTime = new Date().getTime();
		System.out.println("这花费了 " + (endTime - startTime) + "毫秒来把数据增加到索引里面去!");
	}

	/**
	 * 按Content字段查询
	 * @param indexPath 索引文件路径
	 * @param keyword 关键字
	 * @return
	 * @throws Exception
	 */
	public List<Article> searchByKeyWord(String indexPath, String keyword) throws Exception {
		List<Article> articleList = new ArrayList<Article>();
		IndexSearcher search = new IndexSearcher(FSDirectory.open(new File(indexPath)));

		long startTime = new Date().getTime();

		// 下面的是进行p_content和p_name 范围内进行搜索.
		String[] keywords = new String[] { "p_content", "p_name" };// 要检索的字段
		/**
		 * 这里需要注意的就是BooleanClause.Occur[]数组,它表示多个条件之间的关系,
		 * BooleanClause.Occur.MUST表示and, BooleanClause.Occur.MUST_NOT表示not,
		 * BooleanClause.Occur.SHOULD表示or.
		 * */
		BooleanClause.Occur[] clauses = { BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD };// 对应要检索的字段的逻辑（与、或）
		Analyzer analyzer = new PaodingAnalyzer();// 使用庖丁分词，按分词进行检索
		// 用MultiFieldQueryParser得到query对象
		Query query = MultiFieldQueryParser.parse(Version.LUCENE_30, keyword, keywords, clauses, analyzer);// parser.parse(query);
		Filter filter = null;// 过滤
		// 开始匹配
		TopDocs topDocs = search.search(query, filter, 1000);
		System.out.println("共匹配到：" + topDocs.totalHits + "个.");

		for (ScoreDoc scorceDoc : topDocs.scoreDocs) {
			Document doc = search.doc(scorceDoc.doc);
			// System.out.println(scorceDoc.doc+"---"+doc);//便于学习，可以打印出来看看。
			Article article = new Article();
			article.setId(NumericUtils.prefixCodedToLong(doc.get("id")));
			article.setTitle(this.getHighLight(doc, analyzer, query, "title"));
			article.setContent(this.getHighLight(doc, analyzer, query, "content"));
			articleList.add(article);
		}
		search.close();
		long endTime = new Date().getTime();
		System.out.println("检索耗时： " + (endTime - startTime) + "毫秒!");
		return articleList;
	}

	/**
	 * 高亮设置
	 * @param doc
	 * @param analyzer 分词器
	 * @param query
	 * @param field 字段
	 * @throws Exception
	 * @reutrn 高亮后的值
	 */
	public String getHighLight(Document doc, Analyzer analyzer, Query query, String field) throws Exception {
		// 设置高亮显示格式
		// SimpleHTMLFormatter simpleHTMLFormatter = new
		// SimpleHTMLFormatter("<font color='red'><strong>",
		// "</strong></font>");
		SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<b>", "</b>");
		/* 语法高亮显示设置 */
		Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query));
		highlighter.setTextFragmenter(new SimpleFragmenter(100));
		// 取 field 字段值，准备进行高亮
		String fieldValue = doc.get(field);
		TokenStream tokenStream = analyzer.tokenStream(field, new StringReader(fieldValue));
		// 转成高亮的值
		String highLightFieldValue = highlighter.getBestFragment(tokenStream, fieldValue);
		if (highLightFieldValue == null)
			highLightFieldValue = fieldValue;
		return highLightFieldValue;
	}

	/** 创建测试数据 */
	public List<Article> createProductList() {
		List<Article> articleList = new ArrayList<Article>();
		for (int i = 1; i <= 20; i++) {
			Article article = new Article();
			article.setId(Long.valueOf("" + i));
			article.setTitle("手表" + i);
			article.setContent("手表的描述" + i + "块");
			articleList.add(article);
		}
		return articleList;
	}

	/**
	 * 测试主方法
	 * @param args
	 * @throws Exception
	 */
	public static void main(String[] args) throws Exception {
		TestIndex test = new TestIndex();
		String indexPath = Configuration.getInstance().read("config.properties", "indexPath");
		// 创建Lucene索引
		test.createIndex(indexPath, test.createProductList());

		// 从Lucene索引库中——搜索
		List<Article> articleList = test.searchByKeyWord(indexPath, "描述");
		// 搜索结果
		for (Article article : articleList) {
			System.out.println("---------------");
			System.out.println("Id:" + article.getId());
			System.out.println("title:" + article.getTitle());
			System.out.println("content:" + article.getContent());
			System.out.println("---------------");
		}

	}

}
